The data used in this notebook is from the COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. The package used to retrieve data information can be found here.
Accessed dataset on: 2020-08-11
data("coronavirus")
head(coronavirus)
coronavirus <- coronavirus %>%
mutate(country = replace(country, country == "US", "United States"))
# Fill empty province with NA
coronavirus$province[coronavirus$province == ""] <- NA
# Population data
library(wpp2019)
data(pop)
keeps <- c("name","2020")
pop_2020 = pop[keeps]
names(pop_2020)[2] <- "population"
pop_2020 <- pop_2020 %>%
mutate(name = replace(name, name == "United States of America", "United States")) %>%
mutate(name = replace(name, name == "Iran (Islamic Republic of)", "Iran")) %>%
mutate(name = replace(name, name == "Russian Federation", "Russia")) %>%
mutate(name = replace(name, name == "Bolivia (Plurinational State of)", "Bolivia")) %>%
mutate(name = replace(name, name == "Republic of Moldova", "Moldova")) %>%
mutate(name = replace(name, name == "Venezuela (Bolivarian Republic of)", "Venezuela"))
pop_2020$population <- pop_2020$population*1000
# add population of each country
cases_pc_df <- left_join(coronavirus, pop_2020, by = c("country" = "name"))